
* Figure 4*
	*** Note: Shu Shen provided some of the code included in this file
		*I thank her for her help
		*Please attribute the formal testing code to her
		
*** Setup ***
clear all
set more off


*FC version

tempfile Pvaldata
set obs 1
gen hold = 1
save `Pvaldata', replace
	
	

use "$dir\DataSCLB", clear

	keep if Study_Arm !=1

	* Dmiss_BL   DTeachMale LTeachAge LTeachExper LTeachEduc Lenrol LPTR LPLEPassRate LNTeachers
	
		*gen girl = Gender==2 if !mi(Gender)
		 gen girl = male==0

	*Parameters
		*Bootstrap Reps
			*go with 1000 eventually
		local bsreps= 1000
		
	*seed for bootstraps
		global DDKseed=90039		
		
		
		*Test Percentiles
		global qtAll "0.2 .4 .6 .8" 
		global qtHigh "0.6 0.7  0.8 0.9" 
		global qtLow "0.1 0.2 0.3  0.4" 
		
		*Group Definitions of Variables to Test
		global Qgroups All High Low
		global Xgroups Xpre Xage Xgirl Xpregirl Xpreage Xagegirl
		
	
	
		
		*Number of age groups
		local Nagegroup = 3

		*pre-score renaming
		gen Ypre = L_BLabil

	

tempfile cleandata

save `cleandata'


************ Beginning Two Tests *************************
*Test 1: Endline rank similarity: Y by T, with Ypre as Xvar
	*tests whether there is rank similarity by pre-score across T/C
	*Interested near top of the endline distribution

*Test 2: Follow-up rank similarity: Y2 by T, with Y as Xvar
	*tests whether we can reject rank change in the ABSENCE of treatment
		*placebo test: intervention was over after endline

		*makes test 1 and test 2 a little more different than they have to be?

		
*Defining Test Program
	capture program drop test_exo_indqt
	program define test_exo_indqt, eclass 
		tempname diff 
		local iqt=iqt
		tempvar qtindicator rank rank1
		matrix `diff'=J(1,(numl-1),.)
		local qind=0
		gen `qtindicator'=0
		cumul y if t==0, generate(`rank')
		cumul y if t==1, generate(`rank1')
		qui replace `rank'=`rank1' if t==1
		qui replace `qtindicator'=(`rank'<=`iqt') 
		local lind=0
		foreach l of global level {
			local lind=`lind'+1
			qui sum `qtindicator' if t==1 & x==float(`l')
			scalar temp=r(mean)
			qui sum `qtindicator' if t==0 & x==float(`l')
			matrix `diff'[1,`lind']=r(mean)-temp
		}

		ereturn post `diff'
		end 
		

	capture program drop test_exo
	program define test_exo, eclass 
		tempname diff 
		if testmean==0 {
			tempvar qtindicator rank rank1
			matrix `diff'=J(1,numqt*(numl-1),.)
			local qind=0
			gen `qtindicator'=0
			cumul y if t==0, generate(`rank')
			cumul y if t==1, generate(`rank1')
			qui replace `rank'=`rank1' if t==1
			foreach indqt of global qt {
				local qind=`qind'+1
				qui replace `qtindicator'=(`rank'<=`indqt') 
				local lind=0
				foreach l of global level {
					local lind=`lind'+1
					qui sum `qtindicator' if t==1 & x==float(`l')
					scalar temp=r(mean)
					qui sum `qtindicator' if t==0 & x==float(`l')
					matrix `diff'[1,numqt*(`lind'-1)+`qind']=r(mean)-temp
				}
				*end loop over level
			}
			*end loop over qt
		}
		*end block to do if testmean==0
		else if testmean==1{
			tempvar rank rank1
			matrix `diff'=J(1,(numl-1),.)
			cumul y if t==0, generate(`rank')
			cumul y if t==1, generate(`rank1')
			replace `rank'=`rank1' if t==1
			foreach l of global level {
				local lind=`lind'+1
				qui sum `rank' if t==1 & x==float(`l')
				scalar temp=r(mean)
				qui sum `rank' if t==0 & x==float(`l')
				matrix `diff'[1,`lind']=r(mean)-temp
			}
			*end loop over level
		}
		*end block to do if testmean==1
		ereturn post `diff'
		end 
		

*Running Test

foreach X of global Xgroups {
	
	foreach Q of global Qgroups {
		
		global qt "${qt`Q'}"


			use `cleandata', clear

				gen y=EL_EGRA_PCA_Index
				gen t=MT_Program
				gen Xpre = L_BLabil>-0.09 if !mi(L_BLabil)
				egen Xage = cut(Lage), group(`Nagegroup')
				gen Xgirl = girl
				gen Xpregirl=Xgirl*1000+Xpre 
				gen Xpreage = Xpre*1000+Xage
				gen Xagegirl = Xgirl*1000+Xage
				gen x = `X'



			drop if x==.|y==.|t==.
			levelsof x, local(level)
			egen maxx=max(x)
			local maxx=maxx
			global level: list level-maxx
			by x, sort: gen nvals = _n == 1
			count if nvals
			scalar numl=r(N)
			by School_blind, sort: gen ntchs=_n==1
			count if ntchs
			scalar nclu=r(N)


			// testind=1 is quantile test
			// testind=2 is mean test
			// testind=3 is trying to figure otu which quantile is more
			// likely to be rejected.

			forvalues testind=1/3{
				if `testind'==1 {
					set seed $DDKseed
					scalar testmean=0
					local numqt: word count $qt
					scalar numqt=`numqt'
					test_exo
					matrix dm=e(b)
					qui bootstrap _b, cluster(School_blind) reps(`bsreps' ): test_exo 
					matrix hatv=e(V)
					scalar df=numqt*(numl-1)-diag0cnt(hatv)
					matrix wald=dm*invsym(hatv)*dm'
					local Pval_dist_r_g`X'_q`Q'=chi2tail(df,wald[1,1])
					di df
					di `Pval_dist_r_g`X'_q`Q''
				} 
				*end block to do if testind==1
				else if `testind'==2 {
					set seed $DDKseed
					scalar testmean=1
					test_exo
					matrix dm=e(b)
					qui bootstrap _b, cluster(School_blind) reps(`bsreps'): test_exo 
					matrix hatv=e(V)
					scalar df=(numl-1)-diag0cnt(hatv)
					matrix wald=dm*invsym(hatv)*dm'
					local Pval_mean_r_g`X'_q`Q'=chi2tail(df,wald[1,1])
					scalar dec1=(wald[1,1]>invchi2(df,0.95))
					di df
					di `Pval_mean_r_g`X'_q`Q''
				}
				*end block to do if testind==2
				else if `testind'==3 {
					set seed $DDKseed
					local allqt: word count $qt
					scalar allqt=`allqt'
					matrix bsstat=J(allqt,2,.)
					matrix df=J(allqt,1,.)
					scalar testmean=0
					local qind=0
					foreach indqt of global qt {
						scalar numqt=1
						scalar iqt=`indqt'
						local qind=`qind'+1
						test_exo_indqt
						matrix dm=e(b)
						qui bootstrap _b, cluster(School_blind) reps(`bsreps'): test_exo_indqt
						matrix hatv=e(V)
						matrix df[`qind',1]=(numl-1)-diag0cnt(hatv)
						matrix wald=dm*invsym(hatv)*dm'
						matrix bsstat[`qind',1]=wald[1,1]
						matrix bsstat[`qind',2]=1-chi2(df[`qind',1], wald[1,1])
					}
					mat list bsstat
					di "$qt"
				}
				*end block to do if testind==3
					
			}
			*end loop over testind


		gen Pval_mean_end = `Pval_mean_r_g`X'_q`Q''
		gen Pval_dist_end = `Pval_dist_r_g`X'_q`Q''
		gen X = "`X'"
		gen Q = "`Q'"

		contract Pval* X Q
		append using `Pvaldata'
		save `Pvaldata', replace

	}
	*end loop over Qgroups
}
*end loop over Xgroups

*** Graphing Results ***

	use `Pvaldata', clear
		drop if hold==1
		drop hold
		cd "$DDKsave"
		save "$dir/Pval_Test_Data_FC.dta", replace

	use "$dir/Pval_Test_Data_FC.dta", clear
	*Defining Groups for Graphs
	local Xgroups "Xpre Xpreage Xpregirl Xage Xgirl Xagegirl"

	local i = 1
	gen round = .
	foreach X of local Xgroups {
		replace round = `i' if X == "`X'"
		
		
		local i = `i' + 1
		}
		
	label define rounds 1 "Pre" 2 "Pre × Age" 3 "Pre × Sex"  4 "Age" 5 "Sex" 6 "Age × Sex"
	label values round rounds
	
	recode round (4=2) (5=3) (2=4) (3=5)
	label define rounds 1 "Pre" 2 "Age" 3 "Sex" 4 "Pre × Age" 5 "Pre × Sex" 6 "Age × Sex", replace

	*Endline Results

		sort round
		*set scheme modern
		twoway (scatter Pval_dist_end round if Q=="All", mc(maroon) ms(circle_hollow) msize(vlarge)) ///
			|| (scatter Pval_dist_end round if Q=="Low", mc(green) ms(square_hollow) msize(vlarge)) ///
			|| (scatter Pval_dist_end round if Q=="High", mc(blue) ms(diamond_hollow) msize(vlarge)) ///
			|| (scatter Pval_mean_end round if Q=="High", mc(black) msize(vlarge) ms(circle)), ///
		title("Rank Similarity {it:p}-values") ///
		legend(order(  2 "Low" 3 "High" 1 "All" 4 "Mean Test")) ///
		ytitle("P-value") ///
		ytitle("") subtitle("{it:p}-value", justification(left) margin(b t-1 l-7) bexpand) ///
		xtitle("") ///
		ylabel(0(0.2)1, angle(0)  format(%03.1f)) ///
		yline(0.1) yscale(r(0 1)) xscale(r(0 7)) xla(,valuelabel) ///
		note("Groups defined by 2 pre-score bins, 3 age bins, 2 gender bins, and interactions" ///
		"Low/High/All test different quantiles: Low (.1 .2 .3 .4), High (.6 .7 .8 .9) and All (.2 .4. 6 .8)" ///
		"Inference accounts for clustering at the school level") ///
		graphregion(color(white) fcolor(white) icolor(white) ifcolor(white) lcolor(white) ilcolor(white)) plotregion(color(white) fcolor(white) icolor(white) ifcolor(white) lcolor(white) ilcolor(white))
		graph export "$output/AppendixFigure3_FullCost.pdf", as(pdf) replace

	


*RC version
clear
tempfile Pvaldata
set obs 1
gen hold = 1
save `Pvaldata', replace
	


use "$dir\DataSCLB", clear

	keep if Study_Arm !=2
	
	
*	keep if runiform()<=0.05
	

	* Dmiss_BL   DTeachMale LTeachAge LTeachExper LTeachEduc Lenrol LPTR LPLEPassRate LNTeachers

		*gen girl = Gender==2 if !mi(Gender)
		gen girl = male == 0

	*Parameters
		*Bootstrap Reps
			*go with 1000 eventually
		local bsreps= 1000
		
	*seed for bootstraps
		global DDKseed=90039		
		
		
		*Test Percentiles
		global qtAll "0.2 .4 .6 .8" 
		global qtHigh "0.6 0.7  0.8 0.9" 
		global qtLow "0.1 0.2 0.3  0.4" 
		
		*Group Definitions of Variables to Test
		global Qgroups All High Low
		global Xgroups Xpre Xage Xgirl Xpregirl Xpreage Xagegirl
		
	
	
		
		*Number of age groups
		local Nagegroup = 3

		*pre-score renaming
		gen Ypre = L_BLabil

	

tempfile cleandata

save `cleandata'


************ Beginning Two Tests *************************
*Test 1: Endline rank similarity: Y by T, with Ypre as Xvar
	*tests whether there is rank similarity by pre-score across T/C
	*Interested near top of the endline distribution

*Test 2: Follow-up rank similarity: Y2 by T, with Y as Xvar
	*tests whether we can reject rank change in the ABSENCE of treatment
		*placebo test: intervention was over after endline

		*makes test 1 and test 2 a little more different than they have to be?

		
*Defining Test Program
	capture program drop test_exo_indqt
	program define test_exo_indqt, eclass 
		tempname diff 
		local iqt=iqt
		tempvar qtindicator rank rank1
		matrix `diff'=J(1,(numl-1),.)
		local qind=0
		gen `qtindicator'=0
		cumul y if t==0, generate(`rank')
		cumul y if t==1, generate(`rank1')
		qui replace `rank'=`rank1' if t==1
		qui replace `qtindicator'=(`rank'<=`iqt') 
		local lind=0
		foreach l of global level {
			local lind=`lind'+1
			qui sum `qtindicator' if t==1 & x==float(`l')
			scalar temp=r(mean)
			qui sum `qtindicator' if t==0 & x==float(`l')
			matrix `diff'[1,`lind']=r(mean)-temp
		}

		ereturn post `diff'
		end 
		

	capture program drop test_exo
	program define test_exo, eclass 
		tempname diff 
		if testmean==0 {
			tempvar qtindicator rank rank1
			matrix `diff'=J(1,numqt*(numl-1),.)
			local qind=0
			gen `qtindicator'=0
			cumul y if t==0, generate(`rank')
			cumul y if t==1, generate(`rank1')
			qui replace `rank'=`rank1' if t==1
			foreach indqt of global qt {
				local qind=`qind'+1
				qui replace `qtindicator'=(`rank'<=`indqt') 
				local lind=0
				foreach l of global level {
					local lind=`lind'+1
					qui sum `qtindicator' if t==1 & x==float(`l')
					scalar temp=r(mean)
					qui sum `qtindicator' if t==0 & x==float(`l')
					matrix `diff'[1,numqt*(`lind'-1)+`qind']=r(mean)-temp
				}
				*end loop over level
			}
			*end loop over qt
		}
		*end block to do if testmean==0
		else if testmean==1{
			tempvar rank rank1
			matrix `diff'=J(1,(numl-1),.)
			cumul y if t==0, generate(`rank')
			cumul y if t==1, generate(`rank1')
			replace `rank'=`rank1' if t==1
			foreach l of global level {
				local lind=`lind'+1
				qui sum `rank' if t==1 & x==float(`l')
				scalar temp=r(mean)
				qui sum `rank' if t==0 & x==float(`l')
				matrix `diff'[1,`lind']=r(mean)-temp
			}
			*end loop over level
		}
		*end block to do if testmean==1
		ereturn post `diff'
		end 
		

*Running Test

foreach X of global Xgroups {
	
	foreach Q of global Qgroups {
		
		global qt "${qt`Q'}"


			use `cleandata', clear

				gen y=EL_EGRA_PCA_Index
				gen t=CCT_Program
				gen Xpre = L_BLabil>-0.09 if !mi(L_BLabil)
				egen Xage = cut(Lage), group(`Nagegroup')
				gen Xgirl = girl
				gen Xpregirl=Xgirl*1000+Xpre 
				gen Xpreage = Xpre*1000+Xage
				gen Xagegirl = Xgirl*1000+Xage
				gen x = `X'



			drop if x==.|y==.|t==.
			levelsof x, local(level)
			egen maxx=max(x)
			local maxx=maxx
			global level: list level-maxx
			by x, sort: gen nvals = _n == 1
			count if nvals
			scalar numl=r(N)
			by School_blind, sort: gen ntchs=_n==1
			count if ntchs
			scalar nclu=r(N)


			// testind=1 is quantile test
			// testind=2 is mean test
			// testind=3 is trying to figure otu which quantile is more
			// likely to be rejected.

			forvalues testind=1/3{
				if `testind'==1 {
					set seed $DDKseed
					scalar testmean=0
					local numqt: word count $qt
					scalar numqt=`numqt'
					test_exo
					matrix dm=e(b)
					qui bootstrap _b, cluster(School_blind) reps(`bsreps' ): test_exo 
					matrix hatv=e(V)
					scalar df=numqt*(numl-1)-diag0cnt(hatv)
					matrix wald=dm*invsym(hatv)*dm'
					local Pval_dist_r_g`X'_q`Q'=chi2tail(df,wald[1,1])
					di df
					di `Pval_dist_r_g`X'_q`Q''
				} 
				*end block to do if testind==1
				else if `testind'==2 {
					set seed $DDKseed
					scalar testmean=1
					test_exo
					matrix dm=e(b)
					qui bootstrap _b, cluster(School_blind) reps(`bsreps'): test_exo 
					matrix hatv=e(V)
					scalar df=(numl-1)-diag0cnt(hatv)
					matrix wald=dm*invsym(hatv)*dm'
					local Pval_mean_r_g`X'_q`Q'=chi2tail(df,wald[1,1])
					scalar dec1=(wald[1,1]>invchi2(df,0.95))
					di df
					di `Pval_mean_r_g`X'_q`Q''
				}
				*end block to do if testind==2
				else if `testind'==3 {
					set seed $DDKseed
					local allqt: word count $qt
					scalar allqt=`allqt'
					matrix bsstat=J(allqt,2,.)
					matrix df=J(allqt,1,.)
					scalar testmean=0
					local qind=0
					foreach indqt of global qt {
						scalar numqt=1
						scalar iqt=`indqt'
						local qind=`qind'+1
						test_exo_indqt
						matrix dm=e(b)
						qui bootstrap _b, cluster(School_blind) reps(`bsreps'): test_exo_indqt
						matrix hatv=e(V)
						matrix df[`qind',1]=(numl-1)-diag0cnt(hatv)
						matrix wald=dm*invsym(hatv)*dm'
						matrix bsstat[`qind',1]=wald[1,1]
						matrix bsstat[`qind',2]=1-chi2(df[`qind',1], wald[1,1])
					}
					mat list bsstat
					
					di "$qt"
				}
				*end block to do if testind==3
					
			}
			*end loop over testind


		
		di "X = `X'"
		
		di "Q = `Q'"
		
		di "Pval_mean_r_g`X'_q`Q' = `Pval_mean_r_g`X'_q`Q''"
		
		*set trace on
		

		gen Pval_mean_end = `Pval_mean_r_g`X'_q`Q''
		gen Pval_dist_end = `Pval_dist_r_g`X'_q`Q''
		gen X = "`X'"
		gen Q = "`Q'"

		contract Pval* X Q
		append using `Pvaldata'
		save `Pvaldata', replace

	}
	*end loop over Qgroups
}
*end loop over Xgroups

*** Graphing Results ***

	use `Pvaldata', clear
		drop if hold==1
		drop hold
		cd "$DDKsave"
		save "$dir/Pval_Test_Data_RC.dta", replace

	use "$dir/Pval_Test_Data_RC.dta", clear
	*Defining Groups for Graphs
	local Xgroups "Xpre Xpreage Xpregirl Xage Xgirl Xagegirl"

	local i = 1
	gen round = .
	foreach X of local Xgroups {
		replace round = `i' if X == "`X'"
		
		
		local i = `i' + 1
		}
		
	label define rounds 1 "Pre" 2 "Pre × Age" 3 "Pre × Sex"  4 "Age" 5 "Sex" 6 "Age × Sex"
	label values round rounds
	
	recode round (4=2) (5=3) (2=4) (3=5)
	label define rounds 1 "Pre" 2 "Age" 3 "Sex" 4 "Pre × Age" 5 "Pre × Sex" 6 "Age × Sex", replace

	*Endline Results

		sort round
		*set scheme modern
		twoway (scatter Pval_dist_end round if Q=="All", mc(maroon) ms(circle_hollow) msize(vlarge)) ///
			|| (scatter Pval_dist_end round if Q=="Low", mc(green) ms(square_hollow) msize(vlarge)) ///
			|| (scatter Pval_dist_end round if Q=="High", mc(blue) ms(diamond_hollow) msize(vlarge)) ///
			|| (scatter Pval_mean_end round if Q=="High", mc(blac	k) msize(vlarge) ms(circle)), ///
		title("Rank Similarity {it:p}-values") ///
		legend(order(  2 "Low" 3 "High" 1 "All" 4 "Mean Test")) ///
		ytitle("P-value") ///
		ytitle("") subtitle("{it:p}-value", justification(left) margin(b t-1 l-7) bexpand) ///
		xtitle("") ///
		ylabel(0(0.2)1, angle(0)  format(%03.1f)) ///
		yline(0.1) yscale(r(0 1)) xscale(r(0 7)) xla(,valuelabel) ///
		note("Groups defined by 2 pre-score bins, 3 age bins, 2 gender bins, and interactions" ///
		"Low/High/All test different quantiles: Low (.1 .2 .3 .4), High (.6 .7 .8 .9) and All (.2 .4. 6 .8)" ///
		"Inference accounts for clustering at the school level") ///
		graphregion(color(white) fcolor(white) icolor(white) ifcolor(white) lcolor(white) ilcolor(white)) plotregion(color(white) fcolor(white) icolor(white) ifcolor(white) lcolor(white) ilcolor(white))
		graph export "$output/AppendixFigure3_ReducedCost.pdf", as(pdf) replace

	



